/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.nutch.util;

import java.util.Arrays;
import java.util.LinkedList;
import java.util.ListIterator;

/**
 * TrieStringMatcher is a base class for simple tree-based string matching.
 * 
 */
public abstract class TrieStringMatcher {
	protected TrieNode root;

	protected TrieStringMatcher() {
		this.root = new TrieNode('\000', false);
	}

	/**
	 * Node class for the character tree.
	 */
	protected class TrieNode implements Comparable<TrieNode> {
		protected TrieNode[] children;
		protected LinkedList<TrieNode> childrenList;
		protected char nodeChar;
		protected boolean terminal;

		/**
		 * Creates a new TrieNode, which contains the given
		 * <code>nodeChar</code>. If <code>isTerminal</code> is
		 * <code>true</code>, the new node is a <em>terminal</em> node in the
		 * trie.
		 */
		TrieNode(char nodeChar, boolean isTerminal) {
			this.nodeChar = nodeChar;
			this.terminal = isTerminal;
			this.childrenList = new LinkedList<TrieNode>();
		}

		/**
		 * Returns <code>true</code> if this node is a <em>terminal</em> node in
		 * the trie.
		 */
		boolean isTerminal() {
			return terminal;
		}

		/**
		 * Returns the child node of this node whose node-character is
		 * <code>nextChar</code>. If no such node exists, one will be is added.
		 * If <em>isTerminal</em> is <code>true</code>, the node will be a
		 * terminal node in the trie.
		 */
		TrieNode getChildAddIfNotPresent(char nextChar, boolean isTerminal) {
			if (childrenList == null) {
				childrenList = new LinkedList<TrieNode>();
				childrenList.addAll(Arrays.asList(children));
				children = null;
			}

			if (childrenList.size() == 0) {
				TrieNode newNode = new TrieNode(nextChar, isTerminal);
				childrenList.add(newNode);
				return newNode;
			}

			ListIterator<TrieNode> iter = childrenList.listIterator();
			TrieNode node = iter.next();
			while ((node.nodeChar < nextChar) && iter.hasNext())
				node = iter.next();

			if (node.nodeChar == nextChar) {
				node.terminal = node.terminal | isTerminal;
				return node;
			}

			if (node.nodeChar > nextChar)
				iter.previous();

			TrieNode newNode = new TrieNode(nextChar, isTerminal);
			iter.add(newNode);
			return newNode;
		}

		/**
		 * Returns the child node of this node whose node-character is
		 * <code>nextChar</code>. If no such node exists, <code>null</code> is
		 * returned.
		 */
		TrieNode getChild(char nextChar) {
			if (children == null) {
				children = childrenList
						.toArray(new TrieNode[childrenList.size()]);
				childrenList = null;
				Arrays.sort(children);
			}

			int min = 0;
			int max = children.length - 1;
			int mid = 0;
			while (min < max) {
				mid = (min + max) / 2;
				if (children[mid].nodeChar == nextChar)
					return children[mid];
				if (children[mid].nodeChar < nextChar)
					min = mid + 1;
				else
					// if (children[mid].nodeChar > nextChar)
					max = mid - 1;
			}

			if (min == max)
				if (children[min].nodeChar == nextChar)
					return children[min];

			return null;
		}

		public int compareTo(TrieNode other) {
			if (this.nodeChar < other.nodeChar)
				return -1;
			if (this.nodeChar == other.nodeChar)
				return 0;
			// if (this.nodeChar > other.nodeChar)
			return 1;
		}
	}

	/**
	 * Returns the next {@link TrieNode} visited, given that you are at
	 * <code>node</code>, and the the next character in the input is the
	 * <code>idx</code>'th character of <code>s</code>.
	 */
	protected final TrieNode matchChar(TrieNode node, String s, int idx) {
		return node.getChild(s.charAt(idx));
	}

	/**
	 * Adds any necessary nodes to the trie so that the given
	 * <code>String</code> can be decoded and the last character is represented
	 * by a terminal node. Zero-length <code>Strings</code> are ignored.
	 */
	protected final void addPatternForward(String s) {
		TrieNode node = root;
		int stop = s.length() - 1;
		int i;
		if (s.length() > 0) {
			for (i = 0; i < stop; i++)
				node = node.getChildAddIfNotPresent(s.charAt(i), false);
			node = node.getChildAddIfNotPresent(s.charAt(i), true);
		}
	}

	/**
	 * Adds any necessary nodes to the trie so that the given
	 * <code>String</code> can be decoded <em>in reverse</em> and the first
	 * character is represented by a terminal node. Zero-length
	 * <code>Strings</code> are ignored.
	 */
	protected final void addPatternBackward(String s) {
		TrieNode node = root;
		if (s.length() > 0) {
			for (int i = s.length() - 1; i > 0; i--)
				node = node.getChildAddIfNotPresent(s.charAt(i), false);
			node = node.getChildAddIfNotPresent(s.charAt(0), true);
		}
	}

	/**
	 * Returns true if the given <code>String</code> is matched by a pattern in
	 * the trie
	 */
	public abstract boolean matches(String input);

	/**
	 * Returns the shortest substring of <code>input<code> that is matched by a
	 * pattern in the trie, or <code>null<code> if no match exists.
	 */
	public abstract String shortestMatch(String input);

	/**
	 * Returns the longest substring of <code>input<code> that is matched by a
	 * pattern in the trie, or <code>null<code> if no match exists.
	 */
	public abstract String longestMatch(String input);

}
